home *** CD-ROM | disk | FTP | other *** search
- /* National Institute of Standards and Technology (NIST)
- /* National Computer System Laboratory (NCSL)
- /* Office Systems Engineering (OSE) Group
- /* ********************************************************************
- /* D I S C L A I M E R
- /* (March 8, 1989)
- /*
- /* There is no warranty for the NIST NCSL OSE SGML parser and/or the NIST
- /* NCSL OSE SGML parser validation suite. If the SGML parser and/or
- /* validation suite is modified by someone else and passed on, NIST wants
- /* the parser's recipients to know that what they have is not what NIST
- /* distributed, so that any problems introduced by others will not
- /* reflect on our reputation.
- /*
- /* Policies
- /*
- /* 1. Anyone may copy and distribute verbatim copies of the SGML source
- /* code as received in any medium.
- /*
- /* 2. Anyone may modify your copy or copies of SGML parser source code or
- /* any portion of it, and copy and distribute such modifications provided
- /* that all modifications are clearly associated with the entity that
- /* performs the modifications.
- /*
- /* NO WARRANTY
- /* ===========
- /*
- /* NIST PROVIDES ABSOLUTELY NO WARRANTY. THE SGML PARSER AND VALIDATION
- /* SUITE ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
- /* EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- /* THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
- /* WITH YOU. SHOULD THE SGML PARSER OR VALIDATION SUITE PROVE DEFECTIVE,
- /* YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
- /*
- /* IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL NIST BE LIABLE FOR
- /* DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR OTHER SPECIAL,
- /* INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
- /* INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
- /* BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR A
- /* FAILURE OF THE PROGRAM TO OPERATE WITH PROGRAMS NOT DISTRIBUTED BY
- /* NIST) THE PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF
- /* SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
- */
-
- /************************************************************************/
- /* TITLE: SGML Parser */
- /* SYSTEM: Document Processor */
- /* SUBSYSTEM: */
- /* SOURCE FILE: DEFS.H */
- /* AUTHOR: Steve Lindeman, Fred Maples */
- /* */
- /* DATE CREATED: */
- /* LAST MODIFIED: */
- /* */
- /* REVISIONS */
- /* WHEN WHO WHY */
- /************************************************************************/
-
- #include "qntyset.h" /* copied straight out of standard */
- #include "dtd.h" /* Jim's definitions for document type definition */
- #include "semantic.h"
-
- typedef enum read_state { GETOLD, GETNEW } READ_STATE;
-
- typedef enum status { FOUND, NFDHT, NFSH } STATUS;
-
- typedef enum tknretval { TEXT, MARKUP_FOUND } TKNRETVAL;
-
- typedef enum occurind { PLUS, ONE, OPT, OI_IS_NULL } OCCURIND;
-
- #define BOOLEAN int
-
- typedef struct groupdesc { /* structure for entry into linked */
- char groupname[NAMELEN+1]; /* list containing name token group */
- struct groupdesc *next; /* items */
- } GROUPDESC,*GROUPPTR;
-
- typedef struct attrdesc { /* structure for entry into attribute */
- struct attrdesc *next; /* linked list used in start tags */
- BOOLEAN processed;
- char attrname[NAMELEN+1];
- DECLVAL dvcode;
- ADFLT defcode;
- union {
- char *currdef;
- GROUPDESC *currgrp;
- } u2;
- GROUPDESC *groupp;
- } ATTRDESC,*ATTRPTR;
-
- typedef struct tnode { /* structure for entry into binary */
- int nodeid; /* tree describing content model for */
- char occurind; /* generic identifiers */
- char copyoi;
- enum CONTEXT contreq;
- enum CONTEXT copycontreq;
- BOOLEAN changed;
- BOOLEAN contref_attr;
- struct tnode *left;
- union {
- struct tnode *right;
- struct tnode *llptr;
- } u;
- struct tnode *next;
- } TNODE,*TPTR;
-
- typedef struct exceptdesc {
- int tokenid;
- BOOLEAN changed;
- struct exceptdesc *nextlocal;
- struct exceptdesc *nextglobal;
- } EXCEPTDESC,*EXCEPTPTR;
-
- typedef struct stentry { /* structure for entry in symbol */
- char nametoken[NAMELEN+1]; /* table containing generic identifier, */
- int tokenid; /* its token value and associated ptrs */
- TNODE *cmptr; /* to more information */
- int miniexcept;
- ATTRDESC *adptr;
- int num_open;
- int content_type;
- EXCEPTDESC *inclusion;
- EXCEPTDESC *exclusion;
- } STENTRY,*STPTR;
-
- typedef struct entitydesc { /* structure for entry in internal */
- char entityname[NAMELEN+1]; /* general entity linked list */
- char *entityvalue;
- int entitytype;
- struct entitydesc *next;
- } ENTITYDESC,*ENTITYPTR;
-
- typedef struct id_idref_desc { /* structure for entry in ID or */
- char name[NAMELEN+1]; /* IDREF linked list */
- struct id_idref_desc *next;
- } ID_IDREF_DESC,*ID_IDREF_PTR;
-
- typedef struct input_stack {
- int inchar;
- struct input_stack *next;
- } INPUT_STACK,*INPUT_STACK_PTR;
-
- typedef struct out_queue {
- int code;
- char str1[ATTSPLEN];
- char str2[ATTSPLEN];
- struct out_queue *next;
- } OUT_QUEUE,*OUT_QUEUE_PTR;
-
- #define PATHLEN (64) /* valid length for path description */
- #define INTSIZE 15 /* machine dependent -- high order bit for integer */
- #define HIGHBIT (1 << INTSIZE) /* used to turn on high bit of an integer */
-
- #define ERRMSGLEN 80
- #define MAXCOL 110
- /* this probably would be different under UNIX */
- #define DIR_SEPERATOR '\\'
-
- #define PCDATA (-1) /* the ones found in the content model definition */
- #define RCDATA (-3) /* replaceable character data */
- #define CDATA (-4) /* character data */
- #define EMPTY (-5) /* EMPTY content model */
- #define ANY (-6)
-
- #define MAX_ERRORS (1)
-
- #define STAGO (100) /* start tag open */
- #define ETAGO (STAGO+1) /* end tag open */
- #define TAGC (ETAGO+1) /* tag close */
- #define NODELIM (TAGC+1) /* returned from getdelim, none found */
- #define PIO (NODELIM+1) /* processing instruction open */
- #define MDO (PIO+1) /* markup declaration open */
-
- /* these would change with a different character set other than ASCII */
- #define VI '='
- #define MARKUP_END '>'
- #define MARKUP_START '<'
- #define SEPCHAR 0x9
- #define CHARSET_LOWBOUND 0 /* ASCII lower limit */
- #define CHARSET_HIGHBOUND 127 /* ASCII upper limit */
-
- /* CRs are not significant after declarations */
- #define STRIP_CRs(); {int c; while((c=our_fgetc(indoc))==RE) \
- c = our_fgetc(indoc); \
- our_ungetc(c,indoc); }
-
- #define MAX(x,y) ((x) > (y) ? (x) : (y))
- #define FATAL_ERROR() { fprintf(stdout,error_msg);exit(99); }
- #define BLANK(string,length) memset(string,' ',length)
- #define EMPTY_CONTENT(ptr) (ptr->nodeid==EMPTY || ptr->contref_attr==TRUE)
- #define CLEAR_BUF(); { bufptr=0; clear_queue(); }
- #define IS_STARTTAG(token) ((token&HIGHBIT)!=HIGHBIT)
- #define IS_ENDTAG(token) (token&HIGHBIT)
- #define IS_ENDTAG_NOTEQ(token1,token2) ((token1&~HIGHBIT)!=token2)
- #define IS_ENDTAG_EQ(token1,token2) ((token1&~HIGHBIT)==token2)
- #define NULLTERM(string); {char *cptr; for (cptr=string+NAMELEN-1;cptr>=string&&*cptr==' ';cptr--); *(++cptr)='\0';}
- #define SGMLCHAR(c) ((c>31 && c<127) || (c>127 && c<255))
- #define SEPERATOR(c) (c==SPACE || c==RE || c==RS || c==SEPCHAR)
- #define NAMECHAR(c) (isalpha(c) || isdigit(c) || c=='.' || c=='-')
- #define NULLOI(ptr) ptr->occurind='#'
-
- #define STARTMINI_MASK 0x1 /* start tag minimization was declared */
- #define ENDMINI_MASK 0x2 /* end tag minimization was declared */
- #define INCL_MASK 0x4 /* element was declared for inclusion */
- #define EXCL_MASK 0x8 /* element was declared for exclusion */
-
- /* marked sections may be IGNORE, CDATA, RCDATA or INCLUDE */
- #define MS_INCLUDE (0)
- #define MS_RCDATA (MS_INCLUDE+1)
- #define MS_CDATA (MS_RCDATA+1)
- #define MS_IGNORE (MS_CDATA+1)
-
- #define ELEMENT_CONTENT 0
- #define OTHER_CONTENT 1
-
- #define OUR_EE -2 /* We have a problem of sign extension, so we */
- /* are using -2 instead of 0xfe from dtd.h */
-
- #ifndef TRUE
- #define TRUE (1)
- #endif
- #ifndef FALSE
- #define FALSE (0)
- #endif
-